# coding=utf-8
# 存储和jaspar3数据库相关的信息，方便被其他程序调用
import os
import sys
from MAmotif_pkg import MotifScanPeak
import pandas

jaspar3_name = ['SOX10',  # 一共有196个motif
                'Sox3',
                'Sox2',
                'Sox6',
                'SOX9',
                'ZNF263',
                'MZF1_5-13',
                'RREB1',
                'Sox17',
                'Pou5f1',
                'POU2F2',
                'TBP',
                'ZEB1',
                'JUND',
                'EGR2',
                'E2F6',
                'Tcfcp2l1',
                'Bhlhe40',
                'PPARG::RXRA',
                'MAX',
                'Pax4',
                'NR2C2',
                'REST',
                'Foxo1',
                'ESR1',
                'CEBPB',
                'MEF2A',
                'Rrxa',
                'FOXA1',
                'Tcf12',
                'Tcf3',
                'RUNX1',
                'FOXC1',
                'PPARG',
                'RUNX2',
                'Gfi',
                'NHLH1',
                'EWSR1-FLI1',
                'YY1',
                'FOXI1',
                'MAFF',
                'HNF4G',
                'INSM1',
                'JUN',
                'Myod1',
                'TEAD1',
                'Hoxc9',
                'CEBPA',
                'USF2',
                'MYC::MAX',
                'CREB1',
                'NFIL3',
                'PLAG1',
                'SRF',
                'FOXO3',
                'TP53',
                'Pax6',
                'RXR::RAR_DR5',
                'SPIB',
                'TAL1::GATA1',
                'HNF4A',
                'CDX2',
                'FOXL1',
                'Foxa2',
                'EBF1',
                'Nkx3-2',
                'HLF',
                'Nobox',
                'Atoh1',
                'FOXP1',
                'SRY',
                'NF-kappaB',
                'FOXF2',
                'NFE2L1::MafG',
                'NFATC2',
                'Foxd3',
                'Nkx2-5',
                'ESR2',
                'HSF1',
                'NR2F1',
                'PRDM1',
                'FOXD1',
                'T',
                'RFX5',
                'Myog',
                'Stat6',
                'GATA3',
                'Myc',
                'BRCA1',
                'PAX5',
                'Foxq1',
                'TP63',
                'Sox5',
                'IRF1',
                'DUX4',
                'Spz1',
                'TAL1::TCF3',
                'En1',
                'Lhx3',
                'MEF2C',
                'PBX1',
                'Gata1',
                'HNF1B',
                'Nkx2-5',
                'MAFK',
                'Evi1',
                'MZF1_1-4',
                'AR',
                'Mycn',
                'USF1',
                'FOXH1',
                'Rfx1',
                'TLX1::NFIC',
                'TFAP2C',
                'Gfi1b',
                'NFKB1',
                'GATA2',
                'RXRA::VDR',
                'ELF5',
                'TCF7L2',
                'TFAP2A',
                'Bcl6',
                'NFYA',
                'Spi1',
                'Myb',
                'Nr5a2',
                'Nr1h3::Rxra',
                'CTCF',
                'HOXA5',
                'Ddit3::Cebpa',
                'Nfe2l2',
                'Crx',
                'Gata4',
                'RELA',
                'NFYB',
                'RORA_2',
                'STAT2::STAT1',
                'Arnt',
                'ZNF354C',
                'EGR1',
                'Zfp423',
                'Meis1',
                'NR1H2::RXRA',
                'Mafb',
                'NR4A2',
                'IRF2',
                'E2F4',
                'Arnt::Ahr',
                'SMAD2::SMAD3::SMAD4',
                'BATF::JUN',
                'NKX3-1',
                'znf143',
                'Esrrb',
                'Hltf',
                'Nr2e3',
                'NR3C1',
                'RORA_1',
                'E2F1',
                'SP1',
                'NFE2::MAF',
                'REL',
                'HNF1A',
                'FEV',
                'Erg',
                'Hand1::Tcfe2a',
                'STAT3',
                'Stat5a::Stat5b',
                'Pdx1',
                'ARID3A',
                'JUN_var.2',
                'E2F3',
                'HIF1A::ARNT',
                'ZBTB33',
                'AP1',
                'Stat4',
                'Pax2',
                'Prrx2',
                'NFIC',
                'JUNB',
                'FOSL1',
                'JUND_var.2',
                'STAT1',
                'FOSL2',
                'Ets1',
                'SP2',
                'FOS',
                'Zfx',
                'Klf1',
                'NRF1',
                'MIZF',
                'FLI1',
                'Klf4',
                'ELK1',
                'GABPA',
                'ELK4',
                'ELF1']


# read motifscan result with all jaspar3 motifs (not specific motif like MAmotif_pkg.MAmotifIO.read_motifscan_result)
# 这个方法是一次性读取所有jaspar3的motifs的target number结果，而不是在MAmotif_pkg中只读取指定的一个motif的target_number结
# 果，这样可以只做一次匹配（匹配MAnorm peaks和Motifscan的结果）而将所有peaks按照有没有某个motif进行分类。
def read_motifscan_result(motifscan_result, motifs=jaspar3_name):
    # read motifscan result from Wangjiawei's peak result file
    print 'reading motifscan result, start!'
    motifscan = pandas.read_pickle(motifscan_result)  # from Wang Jiawei MotifScan result
    motifscan_peak_list = []
    i = 0
    row_num = motifscan.shape[0]
    for k, v in motifscan.iterrows():
        i += 1
        os.write(1, '\r%d/%d' % (i, row_num))
        motifscan_peak = MotifScanPeak(v['chr'], int(v['start']), int(v['end']), int(v['summit']))
        target_number_list = []
        for name in motifs:
            try:
                target_number_list.append(int(v[name + '.tarnum']))
                # print name
            except:
                target_number_list.append(None)  # using None for assigning the existence of motif
                print '%s not exist in the motifscan result!' % name
        motifscan_peak.set_motif_info(motifs, target_number_list)
        motifscan_peak_list.append(motifscan_peak)
        sys.stdout.flush()
    print '\nend!'
    return motifscan_peak_list